/**********************************************************************
DO FILE SUMMARY

This file conducts the SIVESNU analysis on diabetes and prediabets.

David Flood
dcflood@umich.edu

Questions for group:

1. NS/NR coding as "no" -> coding to "0" in diabetes outcomes
2. Generating SES index -> any preferences? Use 5 components as in Pickens?
3. food insecurity scale -> 
**********************************************************************/

clear
cls
version 16	
set more off
capture log close
set showbaselevels
 
* Set graph options and scheme
set scheme s1color
graph set window fontface "TrebuchetMS"

* Set director
cd "/Users/dcflood/Library/CloudStorage/Dropbox-Personal/Guatemala/SIVESNU"

* Use the macro $S_DATE to save files with current date
global date : di %tdCCYY.NN.DD date("$S_DATE","DMY")
log using "Log files/log_${date}.log", replace

////////////////////////////////////////////////////////////////////////////////
/////////////////// GENERAL DATASET REVIEWING AND CLEANING /////////////////////
////////////////////////////////////////////////////////////////////////////////

* Loading dataset "CUESTIONARIO DE MUJER EN EDAD FERTIL (MEF)"
use "Microdata/2018/Datasets/sivesnu_gt18_mujer_publico.dta", clear

*** Merging variables from from the "hogar" dataset. ***

* rural/urban residence, household food insecurity, SES index

merge 1:1 nboletahogar using "Microdata/2018/Datasets/sivesnu_gt18_hogar_publico.dta", keepusing(ruralurban fies4 bienes_luz bienes_solar bienes_television bienes_refri bienes_telefonofija bienes_celular bienes_lavadora bienes_secadora bienes_microondas bienes_computadora bienes_radio bienes_aire_acond bienes_vent_cedazo)
tab _merge // all from master matched
drop if _merge == 2
drop _merge
	
////////////////////////////////////////////////////////////////////////////////
/////////////////// CLEANING KEY CHARACTERISTICS OF INTEREST ///////////////////
////////////////////////////////////////////////////////////////////////////////	
	
* Age
sum edad_mujer, d
mdesc edad_mujer

	* Generate age categories 1
	gen age_cat = .
	replace age_cat = 1 if inrange(edad_mujer,15,29)
	replace age_cat = 2 if inrange(edad_mujer,30,39)
	replace age_cat = 3 if inrange(edad_mujer,40,49)
		
	label variable age_cat "Age category"
	label define age_cat 1 "15-19 years" ///
		2 "20-24 years" ///
		3 "25-29 years" ///
		4 "30-34 years" ///
		5 "35-39 years" ///
		6 "40-44 years" ///
		7 "45-49 years", modify
	label values age_cat age_cat
	
	tab age_cat
	
	* Generate age categories 2
	gen age_cat2 = .
	replace age_cat2 = 1 if inrange(edad_mujer,15,29)
	replace age_cat2 = 2 if inrange(edad_mujer,30,39)
	replace age_cat2 = 3 if inrange(edad_mujer,40,49)
		
	label variable age_cat2 "Age category"
	label define age_cat2 1 "15-29 years" ///
		2 "30-39 years" ///
		3 "40-49 years", modify
	label values age_cat2 age_cat2
	
	tab age_cat2

* Ethnicity
tab grupoetnico_reportado, m // note this is self-reported, as opposed to observed ("grupoetnico", B003)

encode grupoetnico_reportado, gen(grupoetnico_reportado_encoded)
drop grupoetnico_reportado
label variable grupoetnico_reportado_encoded "Ethnicity"
label define grupoetnico_reportado_encoded 1 "Indigenous" 2 "Not indigenous", modify
label values grupoetnico_reportado_encoded grupoetnico_reportado_encoded

rename grupoetnico_reportado_encoded ethnicity
tab ethnicity

gen maya = .
replace maya = 0 if ethnicity == 2
replace maya = 1 if ethnicity == 1
tab maya

* Language
	/* From survey instrument: 01 Español 02 Kaqchikel 03 Queqchi 04 K'iche 05 Mam 06 Poqomchi 07 Tzu'utujil 08 Kanjobal
	09 Chorti 10 Pocomam 11 Ixil 12 Popti 13 Jacalteco 14 Aguacateco 96 Otro____ */

tab idioma_materno, m

	/*
	  Idioma de |
	   la madre |      Freq.     Percent        Cum.
	------------+-----------------------------------
			  1 |      1,291       72.98       72.98
			  2 |         54        3.05       76.03
			  3 |        152        8.59       84.62
			  4 |         98        5.54       90.16
			  5 |         62        3.50       93.67
			  7 |          4        0.23       93.89
			  8 |         25        1.41       95.31
			 11 |         21        1.19       96.50
			 12 |         11        0.62       97.12
			 14 |          6        0.34       97.46
			 96 |         21        1.19       98.64
			  . |         24        1.36      100.00
	------------+-----------------------------------
		  Total |      1,769      100.00
	*/

tab idioma_materno_otro

	/*
	Otro idioma de |
		  la madre |
	(especificado) |      Freq.     Percent        Cum.
	---------------+-----------------------------------
			  ACHI |          2        9.52        9.52
			   AXI |          1        4.76       14.29
			  Achi |          1        4.76       19.05
			  CHUJ |          3       14.29       33.33
	CHUJ COATANECO |          2        9.52       42.86
			  Chuj |          4       19.05       61.90
		 Cuataneco |          1        4.76       66.67
			  chuj |          7       33.33      100.00
	---------------+-----------------------------------
			 Total |         21      100.00

	*/

gen idioma_materno_dichotomized = .
replace idioma_materno_dichotomized = 0 if idioma_materno == 1
replace idioma_materno_dichotomized = 1 if ///
	idioma_materno == 2 | ///
	idioma_materno == 3 | ///
	idioma_materno == 4 | ///
	idioma_materno == 5 | ///
	idioma_materno == 7 | ///
	idioma_materno == 8 | ///
	idioma_materno == 11 | ///
	idioma_materno == 12 | ///
	idioma_materno == 14 | ///
	idioma_materno == 96
label variable idioma_materno_dichotomized "Maternal language, Mayan"
label define idioma_materno_dichotomized_lab ///
	1 "Mayan" ///
	0 "Spanish", modify //
label values idioma_materno_dichotomized idioma_materno_dichotomized_lab
tab idioma_materno_dichotomized, m

* Area of residence (from household merge above)
	
	/*
	From codebook:
	"1-Urban
	2-Rural"
	*/
	
tab ruralurban
codebook ruralurban
gen rural = .
replace rural = 1 if ruralurban == 2
replace rural = 0 if ruralurban == 1
drop ruralurban
tab rural, m
 
* Education
tab niveleducativomadre

//    Nivel educativo de la |
//                    madre |      Freq.     Percent        Cum.
// -------------------------+-----------------------------------
//                0-Ninguno |         14        0.92        0.92
//               1-Primaria |        817       53.50       54.42
//             2-Secundaria |        599       39.23       93.65
// 3-Superior/universitaria |         86        5.63       99.28
//         4-Alfabetizaci�n |         11        0.72      100.00
// -------------------------+-----------------------------------
//                    Total |      1,527      100.00


	* Dichotomized education to (1) primary or less vs. (2) secondary or more
	gen education = .
	replace education = 1 if niveleducativomadre == "0-Ninguno"
	replace education = 1 if niveleducativomadre == "1-Primaria"
	replace education = 1 if niveleducativomadre == "2-Secundaria"
	replace education = 2 if niveleducativomadre == "3-Superior/universitaria"
	replace education = 2 if niveleducativomadre == "4-Alfabetizaci�n"

	
	label variable education "Education"
	label define education ///
		1 "Primary or less" ///
		2 "Secondary or more", modify //
	label values education education
	tab education

* SES (from household merge above)
	/* David's notes for conducting PCA
	https://www.princeton.edu/~otorres/Factor.pdf
	https://sites.google.com/site/econometricsacademy/econometrics-models/principal-component-analysis
	https://www.youtube.com/watch?v=xNTsAVj0t7U
	https://bmjopen.bmj.com/content/bmjopen/9/7/e024515/DC1/embed/inline-supplementary-material-1.pdf?download=true
	*/

	foreach v of varlist bienes_luz bienes_solar bienes_television bienes_refri bienes_telefonofija bienes_celular bienes_lavadora bienes_secadora bienes_microondas bienes_computadora bienes_radio bienes_aire_acond bienes_vent_cedazo {
		tab `v', m
		encode `v', gen(`v'_encoded)
		drop `v'
		recode `v'_encoded (2=1) (1=0)
		rename `v'_encoded `v'
	}

	foreach v in luz solar television refri telefonofija celular lavadora secadora microondas computadora radio aire_acond vent_cedazo {
	rename bienes_`v' `v'
	}

	pca secadora luz solar television refri telefonofija celular lavadora secadora microondas computadora radio aire_acond vent_cedazo, mineigen(1) blanks(.3)

	predict ses_index, score
	sum ses_index, d
	
	xtile ses_tertile = ses_index [pw=pesomujer], nquant(3)
	
	label variable ses_tertile "SES tertile"
	label define ses_tertile ///
		1 "Low" ///
		2 "Medium" ///
		3 "High", modify //
	label values ses_tertile ses_tertile
	tab ses_tertile


* Health insurance status
tab seguro_igss seguro_noigss, m
recode seguro_igss (98=2) // Assume that unknown or not reported is "no"
recode seguro_noigss (98=2) // Assume that unknown or not reported is "no"

gen igss = .
replace igss = 0 if inlist(seguro_igss,3)
replace igss = 1 if inlist(seguro_igss,1,2)

gen private_insurance = .
replace private_insurance = 0 if inlist(seguro_noigss,2)
replace private_insurance = 1 if inlist(seguro_noigss,1)

gen any_health_insurance = .
replace any_health_insurance = 0 if !missing(igss) & !missing(private_insurance)
replace any_health_insurance = 1 if ///
	igss == 1 | /// afiliada or beneficiara in IGSS
	 private_insurance == 1

label variable any_health_insurance "Health insurance, any"

tab any_health_insurance
tab igss
tab private_insurance
	
* BMI category
encode imc_cat, gen(bmi_cat)
drop imc_cat
recode bmi (1=2)
label variable bmi_cat "BMI category"
codebook bmi_cat // note how the encoding changed up the numbers
label define bmi_cat ///
	2 "Normal or underweight" ///
	3 "Overweight" ///
	4 "Obese", modify //
label values bmi_cat bmi_cat
	
tab bmi_cat, m

* Smoking
tab fumado_vez, m
tab fumado_cuantos, m

	* Generate current smoking
	gen csmoke = .
	replace csmoke = 0 if fumado_vez != .
	replace csmoke = 1 if ///
		fumado_vez == 1 & ///
		(inrange(fumado_cuantos,1,100) | /// average in a day 1 or more in last month
		fumado_cuantos_otro == 1) // " FUMA OCASIONALMENTE"
	label variable csmoke "Currently smokes"
	tab csmoke


* Hypertension
sum presion_sistolica, d
mdesc presion_sistolica // 177 missing

sum presion_diastolica, d
mdesc presion_diastolica // 177 missing 

tab hipertension_med, m // oral med for blood pressure

	* Defining hypertension as SBP >140, DBP >90, or oral med
	gen clin_hypt = .
	replace clin_hypt = 0 if !missing(presion_sistolica) & !missing(presion_diastolica)
	replace clin_hypt = 1 if  ///
		inrange(presion_sistolica,130,240) | /// sbp criterion
		inrange(presion_diastolica,80,150) | /// dbop criterion
		hipertension_med == 1 // med criterion
	
	label variable clin_hypt "Hypertension"
	tab clin_hypt
	
* Family history
tab diabetes_padres, m
clonevar dm_fam_hist_new = diabetes_padres
recode dm_fam_hist_new (2=0)(98=0)
tab dm_fam_hist_new, m

* Prior diagnosis
tab diabetes_mujer, m
clonevar dm_diag_new = diabetes_mujer
recode dm_diag_new (2=0)(98=0)
tab dm_diag_new, m

* Duration of diabetes diagnsosis in years
tab diabetes_mes, m // months
tab diabetes_an, m // years

gen dm_duration = .
replace dm_duration = diabetes_mes/12 if diabetes_mes != .
replace dm_duration = diabetes_an if diabetes_an != .
tab dm_duration
sum dm_duration, d

* Insulin treatment for diabetes
tab diabetes_insulina, m
clonevar insulin_new = diabetes_insulina
recode insulin_new (2=0)(98=0)
replace insulin_new = 0 if dm_diag_new == 0 // assuming undiagnosed not using insulin
tab insulin_new, m

* Oral med treatment for diabetes
tab diabetes_meds, m
clonevar dm_med_new = diabetes_meds
recode dm_med_new (2=0)(98=0)
replace dm_med_new = 0 if dm_diag_new == 0 // assuming undiagnosed not using insulin
tab dm_med_new, m

* Any glucose-lowering
gen glucose_low_med = .
replace glucose_low_med = 0 if dm_med_new == 0 | insulin_new == 0
replace glucose_low_med = 1 if dm_med_new == 1 | insulin_new == 1
tab glucose_low_med, m

* Diet treatment for diabetes
tab diabetes_dieta, m
clonevar dm_diet_new = diabetes_dieta
recode dm_diet_new (2=0)(98=0)
replace dm_diet_new = 0 if dm_diag_new == 0 // assuming undiagnosed not using insulin
tab dm_diet_new, m

* Glucose lowering med or diet treatment
gen any_diabetes_treatment = .
replace any_diabetes_treatment = 0 if !missing(glucose_low_med) & !missing(dm_diet_new)
replace any_diabetes_treatment = 1 if glucose_low_med == 1 | dm_diet_new == 1
tab any_diabetes_treatment

* HbA1c
sum hba1c, d // this looks great, very clean
mdesc hba1c // though 12.3% missing

gen a1c_7 = 0 if !missing(hba1c)
replace a1c_7 = 1 if hba1c <7
tab a1c_7

gen a1c_8 = 0 if !missing(hba1c)
replace a1c_8 = 1 if hba1c <8
tab a1c_8

* Diabetes control
gen dm_control_cascade = 
replace dm_control_cascade = 0 if !missing(a1c_7) & !missing(any_diabetes_treatment)
replace dm_control_cascade = 1 if a1c_7 == 1 & any_diabetes_treatment == 1
tab dm_control_cascade

* BP control
gen bp_control_130_80 = 0 if !missing(presion_sistolica) & !missing(presion_diastolica)
replace bp_control_130_80 = 1 if presion_sistolica <130 & presion_diastolica <80

* Current smoking
gen current_smoking = .
replace current_smoking = 0 if fumado_vez == 2 // has never smoked
replace current_smoking = 0 if fumado_vez == 1 & fumado_cuantos_otro == 2 // has previously smoked but ya no
replace current_smoking = 0 if fumado_vez == 1 & fumado_cuantos_otro == 1 & fumado_cuantos == 0 // has previously smoked and occasionally smoked but 0 in the last 30 days

replace current_smoking = 1 if fumado_vez == 1 & fumado_cuantos_otro == 1 & inrange(fumado_cuantos,1,100) // has previously smoked and occasionally smokes and has smoked 1-100 cigareets in last 30 days
replace current_smoking = 1 if fumado_vez == 1 & fumado_cuantos_otro == 2 & inrange(fumado_cuantos,1,100) // has previously smoked and doesn't occasionally smokes but has smoked 1-100 cigareets in last 30 days
replace current_smoking = 1 if fumado_vez == 1 & fumado_cuantos_otro == 1 & fumado_cuantos == . // has previously smoked and occasionally smoke and missing how many cigareets in last 100 days

tab current_smoking

* Preparing for for age standardized estimates
	gen age_group = ""
	replace age_group = "0-4" if inrange(edad_mujer,0,4)
	replace age_group = "5-9" if inrange(edad_mujer,5,9)
	replace age_group = "10-14" if inrange(edad_mujer,10,14)
	replace age_group = "15-19" if inrange(edad_mujer,15,19)
	replace age_group = "20-24" if inrange(edad_mujer,20,24)
	replace age_group = "25-29" if inrange(edad_mujer,25,29)
	replace age_group = "30-34" if inrange(edad_mujer,30,34)
	replace age_group = "35-39" if inrange(edad_mujer,35,39)
	replace age_group = "40-44" if inrange(edad_mujer,40,44)
	replace age_group = "45-49" if inrange(edad_mujer,45,49)
	replace age_group = "50-54" if inrange(edad_mujer,50,54)
	replace age_group = "55-59" if inrange(edad_mujer,55,59)
	replace age_group = "60-64" if inrange(edad_mujer,60,64)
	replace age_group = "65-69" if inrange(edad_mujer,65,69)
	replace age_group = "70-74" if inrange(edad_mujer,70,74)
	replace age_group = "75-79" if inrange(edad_mujer,75,79)
	replace age_group = "80-84" if inrange(edad_mujer,80,84)
	replace age_group = "85-89" if inrange(edad_mujer,85,89)
	replace age_group = "90-94" if inrange(edad_mujer,90,94)
	replace age_group = "95-99" if inrange(edad_mujer,95,99)
	replace age_group = "100+" if edad_mujer>100

	merge m:1 age_group using "World standard population/WHO standard population.dta"

////////////////////////////////////////////////////////////////////////////////
/////////////////// GENERATING SAMPLE AND DEFINING DIABETES ////////////////////
////////////////////////////////////////////////////////////////////////////////

* Defining the overall sample
generate sample_overall = 0
replace sample_overall = 1 if hba1c != . & embarazada != 1 & !missing(dm_diag_new)
tab sample_overall, m
drop if sample_overall != 1 // ********** dropping

* Defining diabetes

// Definition is (1) elevated HbA1c OR (2) prior diabetes diagnosis
gen clin_dia = 0 if !missing(hba1c) & !missing(dm_diag_new)
replace clin_dia = 1 if ///
	glucose_low_med == 1 | /// prior diagnosis
	inrange(hba1c,6.5,20)
tab clin_dia, m
list clin_dia hba1c dm_diag_new if hba1c == . & dm_diag_new == 1 // only 3 records of diagnosed women with missing hba1c

////////////////////////////////////////////////////////////////////////////////
////////////////////////////////// SVYSET //////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////	

* svyset
svyset conglomerado[pw = pesomujer] || nboletahogar, singleunit(centered) 
	
////////////////////////////////////////////////////////////////////////////////
///////////////////// TABLE 1: Participant characteristics /////////////////////
////////////////////////////////////////////////////////////////////////////////

* Create the Excel doc
putexcel set "Putexcel tables/Table 1 Participant characteristics ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel B1 = "n"
putexcel C1 = "Weighted value"

putexcel A1 = "Characteristic"
putexcel A2 = "Age in years, median (IQR)"
putexcel A3 = "Ethnicity, % (95% CI)"
putexcel A4 = "Indigenous"
putexcel A5 = "Not indigenous"
putexcel A6 = "Maternal language, % (95% CI)"
putexcel A7 = "Spanish"
putexcel A8 = "Mayan"
putexcel A9 = "Residence, % (95% CI)"
putexcel A10 = "Urban"
putexcel A11 = "Rural"
putexcel A12 = "Education, % (95% CI)"
putexcel A13 = "Primary or less"
putexcel A14 = "Secondary or greater"
putexcel A15 = "Economic tertiles, % (95% CI)"
putexcel A16 = "Low"
putexcel A17 = "Medium"
putexcel A18 = "High"
putexcel A19 = "BMI, median (IQR)"
putexcel A20 = "BMI category, % (95% CI)"
putexcel A21 = "Underweight or normal weight (BMI <25.0)"
putexcel A22 = "Overweight (BMI 25.0 to 29.9)"
putexcel A23 = "Obese (BMI ≥30"

* Age in years, median (IQR)
sum edad_mujer if sample_overall == 1
matrix  table1_matrix = r(N) 
matlist table1_matrix

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B2 = "`b'"

sum edad_mujer if sample_overall == 1
epctile edad_mujer, p(25 50 75) svy subpop(sample_overall) 

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.0f")
	local lci = string(table1_matrix[1,1],"%9.0f")
	local uci = string(table1_matrix[1,3],"%9.0f")
	putexcel C2 = "`b' (`lci' to `uci')"

* Ethnicity, % (95% CI)
tab ethnicity if sample_overall == 1, matcell(table1_matrix)
	
	matlist table1_matrix

	local b = string(`r(N)',"%9.0fc")
	putexcel B3 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B4 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B5 = "`b'"
	
svy, subpop(sample_overall): proportion ethnicity

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C4 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C5 = "`b' (`lci' to `uci')"

* Maternal language
tab idioma_materno_dichotomized if sample_overall == 1, matcell(table1_matrix)

	matlist table1_matrix

	local b = string(`r(N)',"%9.0fc")
	putexcel B6 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B7 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B8 = "`b'"

svy, subpop(sample_overall): proportion idioma_materno_dichotomized
	
	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C7 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C8 = "`b' (`lci' to `uci')"

* Residence, % (95% CI)
tab rural if sample_overall == 1, matcell(table1_matrix)

	matlist table1_matrix

	local b = string(`r(N)',"%9.0fc")
	putexcel B9 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B10 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B11 = "`b'"

svy, subpop(sample_overall): proportion rural
	
	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C10 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C11 = "`b' (`lci' to `uci')"
	
* Education, % (95% CI)
tab education if sample_overall == 1, matcell(table1_matrix)

	matlist table1_matrix

	local b = string(`r(N)',"%9.0fc")
	putexcel B12 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B13 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B14 = "`b'"

svy, subpop(sample_overall): proportion education
	
	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C13 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C14 = "`b' (`lci' to `uci')"

* Economic tertiles, % (95% CI)
tab ses_tertile if sample_overall == 1, matcell(table1_matrix)

	matlist table1_matrix

	local b = string(`r(N)',"%9.0fc")
	putexcel B15 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B16 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B17 = "`b'"
	
	local b = string(table1_matrix[3,1],"%9.0fc")
	putexcel B18 = "`b'"

svy, subpop(sample_overall): proportion ses_tertile

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C16 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C17 = "`b' (`lci' to `uci')"
	
	local b = string(table1_matrix[1,3]*100,"%9.1f")
	local lci = string(table1_matrix[5,3]*100,"%9.1f")
	local uci = string(table1_matrix[6,3]*100,"%9.1f")
	putexcel C18 = "`b' (`lci' to `uci')"

* BMI, median (IQR)
sum imc if sample_overall == 1
matrix  table1_matrix = r(N) 
matlist table1_matrix

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B19 = "`b'"

sum imc if sample_overall == 1
epctile imc, p(25 50 75) svy subpop(sample_overall) 
		
	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.1f")
	local lci = string(table1_matrix[1,1],"%9.1f")
	local uci = string(table1_matrix[1,3],"%9.1f")
	putexcel C19 = "`b' (`lci' to `uci')"
	
* BMI category, % (95% CI)
tab bmi_cat if sample_overall == 1, matcell(table1_matrix)

	local b = string(`r(N)',"%9.0fc")
	putexcel B20 = "`b'"

	local b = string(table1_matrix[1,1],"%9.0fc")
	putexcel B21 = "`b'"
	
	local b = string(table1_matrix[2,1],"%9.0fc")
	putexcel B22 = "`b'"
	
	local b = string(table1_matrix[3,1],"%9.0fc")
	putexcel B23 = "`b'"
	
svy, subpop(sample_overall): proportion bmi_cat

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,1]*100,"%9.1f")
	local lci = string(table1_matrix[5,1]*100,"%9.1f")
	local uci = string(table1_matrix[6,1]*100,"%9.1f")
	putexcel C21 = "`b' (`lci' to `uci')"

	local b = string(table1_matrix[1,2]*100,"%9.1f")
	local lci = string(table1_matrix[5,2]*100,"%9.1f")
	local uci = string(table1_matrix[6,2]*100,"%9.1f")
	putexcel C22 = "`b' (`lci' to `uci')"
	
	local b = string(table1_matrix[1,3]*100,"%9.1f")
	local lci = string(table1_matrix[5,3]*100,"%9.1f")
	local uci = string(table1_matrix[6,3]*100,"%9.1f")
	putexcel C23 = "`b' (`lci' to `uci')"

////////////////////////////////////////////////////////////////////////////////
//////////////////// FIGURE 1: Diabetes prevalence /////////////////////////////
////////////////////////////////////////////////////////////////////////////////

* Step 1: Directly estimate the prevalence overall and by age groups -----------

* Overall
svy, subpop(sample_overall): proportion clin_dia
matlist r(table)
matrix prevalence_clin_dia = r(table)

* By age group
svy, subpop(sample_overall): proportion clin_dia, over(age_cat)
matlist r(table)
matrix prevalence_clin_dia_age_cat = r(table)

* Step 2: Save the directly estimated values into Excel ------------------------

putexcel set "Putexcel tables/Prevalence figure ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel A1 = "Age group"
putexcel A2 = "15-29"
putexcel A3 = "30-39"
putexcel A4 = "40-49"


putexcel B1 = "est_clin_dia"
putexcel C1 = "lci_clin_dia"
putexcel D1 = "uci_clin_dia"

* Exporting estimates

	* clin_dia
	matlist  prevalence_clin_dia_age_cat
	matrix matrix_sample_table = prevalence_clin_dia_age_cat
	
		* 15-29 years
		putexcel B2 = matrix_sample_table[1,4]
		putexcel C2 = matrix_sample_table[5,4]
		putexcel D2 = matrix_sample_table[6,4]
		
		* 30-39 years
		putexcel B3 = matrix_sample_table[1,5]
		putexcel C3 = matrix_sample_table[5,5]
		putexcel D3 = matrix_sample_table[6,5]
		
		* 40-49 years
		putexcel B4 = matrix_sample_table[1,6]
		putexcel C4 = matrix_sample_table[5,6]
		putexcel D4 = matrix_sample_table[6,6]

		
* Step 3: Import the directly estimated values, tinker, resave -----------------
frame create figure
frame change figure		
						
import excel "Putexcel tables/Prevalence figure ${date}.xlsx", sheet("Sheet1") firstrow clear

* Convert proportions to percents
foreach v of varlist est_clin_dia lci_clin_dia uci_clin_dia  {
	replace `v' = `v'*100
}

encode Agegroup, gen(age_group2)
drop Agegroup

* This produces the nice caption in the figure for overall prevalence
	matlist prevalence_clin_dia
	matrix matrix_sample_table = prevalence_clin_dia
			
	local b = string(matrix_sample_table[1,2]*100,"%9.1f")
	local lci = string(matrix_sample_table[5,2]*100,"%9.1f")
	local uci = string(matrix_sample_table[6,2]*100,"%9.1f")
	global diabetes_prevalence = "`b'% (95% CI: `lci' to `uci')"
	di "$diabetes_prevalence"

* Convert the age groups to the middle of the range
gen age = .
replace age = 22.5 if age_group2 == 1 // 15-29
replace age = 35 if age_group2 == 2 // 30-39
replace age = 45 if age_group2 == 3 // 40-49

* Saving the directly estimated results into a separate dataset
save "Temporary datasets/direct_estimation_prevalence_$date.dta", replace

* Step 4: Model using restricted cubic splines to give nice bounds -------------
frame change default

* Generate restricted cubic splines
mkspline age_spline = edad_mujer if sample_overall == 1, cubic nknots(3) displayknots
	
* Local macro for graph titles
// source code: https://www.stata.com/statalist/archive/2006-10/msg00909.html
// local vtext : variable label "title" 
	
	/*	See this links on how to do margins with restricted cubic splines:
		https://tinyurl.com/yd8ft82l   */
	
	foreach n of numlist 15(1)49 {
		local atspec`n'
		foreach v of varlist age_spline* {
			summ `v' if edad_mujer == `n'
			local atspec`n' `atspec`n'' `v' = `r(mean)'
			}
		di "`atspec`n'''"
		}

	tempfile margins
	svy, subpop(sample) : logistic clin_dia c.age_spline*
		
	margins, ///
		at(`atspec15') ///
		at(`atspec16') ///
		at(`atspec17') ///
		at(`atspec18') ///
		at(`atspec19') ///
		at(`atspec20') ///
		at(`atspec21') ///
		at(`atspec22') ///
		at(`atspec23') ///
		at(`atspec24') ///
		at(`atspec25') ///
		at(`atspec26') ///
		at(`atspec27') ///
		at(`atspec28') ///
		at(`atspec29') ///
		at(`atspec30') ///
		at(`atspec31') ///
		at(`atspec32') ///
		at(`atspec33') ///
		at(`atspec34') ///
		at(`atspec35') ///
		at(`atspec36') ///
		at(`atspec37') ///
		at(`atspec38') ///
		at(`atspec39') ///
		at(`atspec40') ///
		at(`atspec41') ///
		at(`atspec42') ///
		at(`atspec43') ///
		at(`atspec44') ///
		at(`atspec45') ///
		at(`atspec46') ///
		at(`atspec47') ///
		at(`atspec48') ///
		at(`atspec49') ///
		saving(`margins')
	
* Step 5: Merge the direct and modeled estimates -------------------------------	

	frame change figure
	use `margins', clear
	replace _ci_lb = 0 if _ci_lb <0 // This ensures none of the lower CI's are negative in margins
	gen modeled = 1 // this keeps things straight between the 2 methods
	rename _at1 age
		
append using "Temporary datasets/direct_estimation_prevalence_$date.dta"		
keep age _ci_lb _ci_ub _margin est_clin_dia lci_clin_dia uci_clin_dia modeled

replace est_clin_dia = _margin*100 if missing(est_clin_dia)
replace lci_clin_dia = _ci_lb*100 if missing(lci_clin_dia)
replace uci_clin_dia = _ci_ub*100 if missing(uci_clin_dia)
replace modeled = 0 if missing(modeled)
		
* Step 6: Graph the results ----------------------------------------------------
	
	* Local macros for text size
	local overall_text_size "3.4rs"	
	local axis_title_size "3.4rs"	
	local legend_text_size "3.0rs"	
	local axis_label_size "3.0rs"	
	
	twoway ///
		(rarea lci_clin_dia uci_clin_dia age if modeled == 1,   ///
			lcolor(white) ///
			fcolor(gs15))   ///
		/// (line est_clin_dia age if modeled == 1, ///
		///	color(gs8)) ///
		(rcap lci_clin_dia uci_clin_dia age if modeled == 0, ///
			lcolor(default)) ///
		(scatter est_clin_dia age if modeled == 0, connect(line) lcolor(black) ///
			mcolor(black) ///
			msize(1rs)), 	///
	note("Overall prevalence: $diabetes_prevalence", ring(0) pos(11) margin(l=4rs r=0rs t=2rs b=0) size(`overall_text_size') linegap(1.4) ) ///
	/// graph attributes
		xsize(5) ///
		ysize(4) ///
		xlabel(15(5)50, labsize(`axis_title_size'))  ///
		xscale(ra(15 50))   ///
		ylabel(0(10)30, labsize(`axis_title_size'))  ///
		yscale(r(0 30) lcolor(black) lwidth(none) titlegap(0))  ///
		legend( ///
			order(3 "Direct estimates with 95% CI" 1 "Modeled 95% CI ") ///
			rows(2) size(`legend_text_size') ring(1) position(6) bmargin(l=0 r=0 b=0 t=0) region(color(none) lwidth(none))) ///
		xtitle("Age (years)", size(`axis_title_size') color(black) margin(l=0 r=0 b=0 t=2rs)) ///
		ytitle("Prevalence (%)", size(`axis_title_size') color(black) margin(l=2rs r=2rs b=0 t=0)) ///
		plotregion(margin(l=0 r=0 b=0 t=0) lwidth(none) lalign(center))  ///
		graphregion(margin(l=0rs 5rs t=5rs b=2rs)) ///
		xscale(line ) yscale(line ) ///
		name(prevalence_figure, replace) //	
		
gr_edit plotregion1.AddTextBox added_text editor 4.3 20.5
gr_edit plotregion1.added_text_new = 1
gr_edit plotregion1.added_text_rec = 1
gr_edit plotregion1.added_text[1].style.editstyle  angle(default) size( sztype(relative) val(3.4) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit plotregion1.added_text[1].text = {}
gr_edit plotregion1.added_text[1].text.Arrpush 15-29 years

gr_edit plotregion1.AddTextBox added_text editor 10.8 32
gr_edit plotregion1.added_text_new = 2
gr_edit plotregion1.added_text_rec = 2
gr_edit plotregion1.added_text[2].style.editstyle  angle(default) size( sztype(relative) val(3.4) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit plotregion1.added_text[2].text = {}
gr_edit plotregion1.added_text[2].text.Arrpush 30-39 years

gr_edit plotregion1.AddTextBox added_text editor 20.2 42
gr_edit plotregion1.added_text_new = 3
gr_edit plotregion1.added_text_rec = 3
gr_edit plotregion1.added_text[3].style.editstyle  angle(default) size( sztype(relative) val(3.4) allow_pct(1)) color(black) horizontal(left) vertical(middle) margin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) linegap( sztype(relative) val(0) allow_pct(1)) drawbox(no) boxmargin( gleft( sztype(relative) val(0) allow_pct(1)) gright( sztype(relative) val(0) allow_pct(1)) gtop( sztype(relative) val(0) allow_pct(1)) gbottom( sztype(relative) val(0) allow_pct(1))) fillcolor(bluishgray) linestyle( width( sztype(relative) val(.2) allow_pct(1)) color(black) pattern(solid) align(inside)) box_alignment(east) editcopy
gr_edit plotregion1.added_text[3].text = {}
gr_edit plotregion1.added_text[3].text.Arrpush 40-49 years

		
graph save "Figures/prevalence_figure_$date.gph", replace         
graph export "Figures/prevalence_figure_$date.pdf", as(pdf) name(prevalence_figure) replace		
graph export "Figures/prevalence_figure_$date.png", as(png) name(prevalence_figure) replace		
		
frame change default
frame drop figure

////////////////////////////////////////////////////////////////////////////////
///////////////////// FIGURE 2: Prevalence by subgroups /////////////////////////
////////////////////////////////////////////////////////////////////////////////

/*******************************************************************************
Step 1: Outputting data for forest plot
*******************************************************************************/

* Create the Excel doc
putexcel set "Putexcel tables/Prevalence subgroup figure ${date}.xlsx", replace 
	   	   
* Make the table frame
putexcel B1 = "prev_est"
putexcel C1 = "prev_lci"
putexcel D1 = "prev_uci"
putexcel E1 = "abs_est"
putexcel F1 = "abs_lci"
putexcel G1 = "abs_uci"

putexcel A1 = "Characteristic"
putexcel A2 = "Age group in years"
putexcel A3 = "15-29 years"
putexcel A4 = "30-39 years"
putexcel A5 = "40-49 years"
putexcel A6 = "Ethnicity"
putexcel A7 = "Indigenous"
putexcel A8 = "Non-indigenous"
putexcel A9 = "Maternal language"
putexcel A10 = "Spanish"
putexcel A11 = "Mayan"
putexcel A12 = "Residence"
putexcel A13 = "Urban"
putexcel A14 = "Rural"
putexcel A15 = "Education"
putexcel A16 = "Primary or less"
putexcel A17 = "Secondary or greater"
putexcel A18 = "Economic tertiles"
putexcel A19 = "Low"
putexcel A20 = "Medium"
putexcel A21 = "High"
putexcel A22 = "BMI category"
putexcel A23 = "Underweight or normal weight (BMI <25.0)"
putexcel A24 = "Overweight (BMI 25.0 to 29.9)"
putexcel A25 = "Obese (BMI ≥30)"
putexcel A26 = "Overall"
	
/*******************************************************************************
Prevalences
*******************************************************************************/
		
* Age group in years

	svy, subpop(sample_overall): proportion clin_dia, over(age_cat)
	matrix figure2 = r(table)
	matlist figure2
	
	* Estimates

		* 15-29 years
		putexcel B3 = matrix(figure2[1,4])
		putexcel C3 = matrix(figure2[5,4])
		putexcel D3 = matrix(figure2[6,4])
			
		* 30-39 years
		putexcel B4 = matrix(figure2[1,5])
		putexcel C4 = matrix(figure2[5,5])
		putexcel D4 = matrix(figure2[6,5])
		
		* 40-49 years
		putexcel B5 = matrix(figure2[1,6])
		putexcel C5 = matrix(figure2[5,6])
		putexcel D5 = matrix(figure2[6,6])
		
	* Margins
	
		matrix list e(b)
		
		* 15-29 years
		lincom _b[1.clin_dia@1.age_cat] - _b[1.clin_dia@2.age_cat]
			
		putexcel E3 = `r(estimate)'
		putexcel F3 = `r(lb)'
		putexcel G3 = `r(ub)'
			
		* 30-39 years
		lincom _b[1.clin_dia@2.age_cat] - _b[1.clin_dia@2.age_cat]
			
		putexcel E4 = `r(estimate)'
		putexcel F4 = `r(lb)'
		putexcel G4 = `r(ub)'
		
		* 40-49 years
		lincom _b[1.clin_dia@3.age_cat] - _b[1.clin_dia@2.age_cat]
			
		putexcel E5 = `r(estimate)'
		putexcel F5 = `r(lb)'
		putexcel G5 = `r(ub)'
		
* Ethnicity

	svy, subpop(sample_overall): proportion clin_dia, over(ethnicity) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2
	
	* Estimates

		* Indigenous
		putexcel B7 = matrix(figure2[1,3])
		putexcel C7 = matrix(figure2[5,3])
		putexcel D7 = matrix(figure2[6,3])
			
		* Non-indigenous
		putexcel B8 = matrix(figure2[1,4])
		putexcel C8 = matrix(figure2[5,4])
		putexcel D8 = matrix(figure2[6,4])
	
	* Margins
	
		matrix list e(b)
		
		* Indigenous
		lincom _b[1.clin_dia@1.ethnicity] - _b[1.clin_dia@1.ethnicity]
			
		putexcel E7 = `r(estimate)'
		putexcel F7 = `r(lb)'
		putexcel G7 = `r(ub)'
			
		* Non-indigenous
		lincom _b[1.clin_dia@2.ethnicity] - _b[1.clin_dia@1.ethnicity]
			
		putexcel E8 = `r(estimate)'
		putexcel F8 = `r(lb)'
		putexcel G8 = `r(ub)'

* Maternal language

	svy, subpop(sample_overall): proportion clin_dia, over(idioma_materno_dichotomized) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2

	* Estimates

		* Spanish
		putexcel B10 = matrix(figure2[1,3])
		putexcel C10 = matrix(figure2[5,3])
		putexcel D10 = matrix(figure2[6,3])
		
		* Mayan
		putexcel B11 = matrix(figure2[1,4])
		putexcel C11 = matrix(figure2[5,4])
		putexcel D11 = matrix(figure2[6,4])
		
	* Margins
		
		matrix list e(b)
		
		* Spanish
		lincom _b[1.clin_dia@0.idioma_materno_dichotomized] - _b[1.clin_dia@0.idioma_materno_dichotomized]
			
		putexcel E10 = `r(estimate)'
		putexcel F10 = `r(lb)'
		putexcel G10 = `r(ub)'
		
		* Mayan
		lincom _b[1.clin_dia@1.idioma_materno_dichotomized] - _b[1.clin_dia@0.idioma_materno_dichotomized]
			
		putexcel E11 = `r(estimate)'
		putexcel F11 = `r(lb)'
		putexcel G11 = `r(ub)'
		
* Residence

	svy, subpop(sample_overall): proportion clin_dia, over(rural) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2

	* Estimates
	
		* Urban
		putexcel B13 = matrix(figure2[1,3])
		putexcel C13 = matrix(figure2[5,3])
		putexcel D13 = matrix(figure2[6,3])

		* Rural
		putexcel B14 = matrix(figure2[1,4])
		putexcel C14 = matrix(figure2[5,4])
		putexcel D14 = matrix(figure2[6,4])

	* Margins
		
		matrix list e(b)
	
		* Urban
		lincom _b[1.clin_dia@0.rural] - _b[1.clin_dia@0.rural]
			
		putexcel E13 = `r(estimate)'
		putexcel F13 = `r(lb)'
		putexcel G13 = `r(ub)'
		
		* Rural
		lincom _b[1.clin_dia@1.rural] - _b[1.clin_dia@0.rural]
			
		putexcel E14 = `r(estimate)'
		putexcel F14 = `r(lb)'
		putexcel G14 = `r(ub)'
		
* Education

	svy, subpop(sample_overall): proportion clin_dia, over(education) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2

	* Estimates

		* Primary or less
		putexcel B16 = matrix(figure2[1,3])
		putexcel C16 = matrix(figure2[5,3])
		putexcel D16 = matrix(figure2[6,3])
		
		* Secondary or greater
		putexcel B17 = matrix(figure2[1,4])
		putexcel C17 = matrix(figure2[5,4])
		putexcel D17 = matrix(figure2[6,4])

	* Margins
			
		matrix list e(b)

		* Primary or less
		lincom _b[1.clin_dia@1.education] - _b[1.clin_dia@1.education]
			
		putexcel E16 = `r(estimate)'
		putexcel F16 = `r(lb)'
		putexcel G16 = `r(ub)'

		* Secondary or greater
		lincom _b[1.clin_dia@2.education] - _b[1.clin_dia@1.education]
			
		putexcel E17 = `r(estimate)'
		putexcel F17 = `r(lb)'
		putexcel G17 = `r(ub)'
		
* Economic tertiles

	svy, subpop(sample_overall): proportion clin_dia, over(ses_tertile) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2

	* Estimates

		* Low
		putexcel B19 = matrix(figure2[1,4])
		putexcel C19 = matrix(figure2[5,4])
		putexcel D19 = matrix(figure2[6,4])

		* Medium
		putexcel B20 = matrix(figure2[1,5])
		putexcel C20 = matrix(figure2[5,5])
		putexcel D20 = matrix(figure2[6,5])
		
		* High
		putexcel B21 = matrix(figure2[1,6])
		putexcel C21 = matrix(figure2[5,6])
		putexcel D21 = matrix(figure2[6,6])

	* Margins
			
		matrix list e(b)

		* Low
		lincom _b[1.clin_dia@1.ses_tertile] - _b[1.clin_dia@1.ses_tertile]
			
		putexcel E19 = `r(estimate)'
		putexcel F19 = `r(lb)'
		putexcel G19 = `r(ub)'
		
		* Medium
		lincom _b[1.clin_dia@2.ses_tertile] - _b[1.clin_dia@1.ses_tertile]
			
		putexcel E20 = `r(estimate)'
		putexcel F20 = `r(lb)'
		putexcel G20 = `r(ub)'

		* High
		lincom _b[1.clin_dia@3.ses_tertile] - _b[1.clin_dia@1.ses_tertile]
			
		putexcel E21 = `r(estimate)'
		putexcel F21 = `r(lb)'
		putexcel G21 = `r(ub)'
		
* BMI category

	svy, subpop(sample_overall): proportion clin_dia, over(bmi_cat) stdize(age_group) stdweight(who_population)
	matrix figure2 = r(table)
	matlist figure2
	
	* Estimates
	
		* Underweight or normal weight (BMI <25.0)
		putexcel B23 = matrix(figure2[1,4])
		putexcel C23 = matrix(figure2[5,4])
		putexcel D23 = matrix(figure2[6,4])
		
		* Overweight (BMI 25.0 to 29.9)
		putexcel B24 = matrix(figure2[1,5])
		putexcel C24 = matrix(figure2[5,5])
		putexcel D24 = matrix(figure2[6,5])
		
		* Obese (BMI ≥30)
		putexcel B25 = matrix(figure2[1,6])
		putexcel C25 = matrix(figure2[5,6])
		putexcel D25 = matrix(figure2[6,6])

* Margins
		
		matrix list e(b)

		* Underweight or normal weight (BMI <25.0)
		lincom _b[1.clin_dia@2.bmi_cat] - _b[1.clin_dia@3.bmi_cat]
			
		putexcel E23 = `r(estimate)'
		putexcel F23 = `r(lb)'
		putexcel G23 = `r(ub)'
		
		* Overweight (BMI 25.0 to 29.9)
		lincom _b[1.clin_dia@3.bmi_cat] - _b[1.clin_dia@3.bmi_cat]
			
		putexcel E24 = `r(estimate)'
		putexcel F24 = `r(lb)'
		putexcel G24 = `r(ub)'
		
		* Obese (BMI ≥30)
		lincom _b[1.clin_dia@4.bmi_cat] - _b[1.clin_dia@3.bmi_cat]
			
		putexcel E25 = `r(estimate)'
		putexcel F25 = `r(lb)'
		putexcel G25 = `r(ub)'
		
* Overall	

svy, subpop(sample_overall): proportion clin_dia
matrix figure2 = r(table)
matlist figure2
		
putexcel B26 = matrix(figure2[1,2])
putexcel C26 = matrix(figure2[5,2])
putexcel D26 = matrix(figure2[6,2])
		
/*******************************************************************************
Step 2: Importing and outputting data for forest plot
*******************************************************************************/		

frame create forestplot
frame change forestplot

* Load data
import excel "Putexcel tables/Prevalence subgroup figure ${date}.xlsx", sheet("Sheet1") firstrow clear

* Step 1: Add a new observation (a blank row)
insobs 1, after(4)
insobs 1, after(8)
insobs 1, after(12)
insobs 1, after(16)
insobs 1, after(20)
insobs 1, after(25)
insobs 1, after(30)

* See help for what each of the "_USE" categories refers to	
gen _USE = 1
replace _USE = 0 if inlist(_n,1,6,10,14,18,22,26)
replace _USE = 5 if inlist(_n,32)
replace _USE = 6 if Characteristic == ""

foreach var of varlist prev_est prev_lci prev_uci abs_est abs_lci abs_uci {
	replace `var' = `var'*100
}

* Making string variables	
gen prev_est_s = string(prev_est,"%9.1f")
gen prev_lci_s = string(prev_lci,"%9.1f")
gen prev_uci_s = string(prev_uci,"%9.1f")
gen output_prev_s = prev_est_s + " (" + prev_lci_s + " to " + prev_uci_s + ")"

gen abs_est_s = string(abs_est,"%9.1f")
gen abs_lci_s = string(abs_lci,"%9.1f")
gen abs_uci_s = string(abs_uci,"%9.1f")
gen output_abs_s = abs_est_s + " (" + abs_lci_s + " to " + abs_uci_s + ")"

replace output_prev_s = "" if prev_est == .
replace output_abs_s = "" if abs_est == .

replace output_abs_s = "Ref" if Characteristic == "30-39 years"
replace output_abs_s = "Ref" if Characteristic == "Indigenous"
replace output_abs_s = "Ref" if Characteristic == "Spanish"
replace output_abs_s = "Ref" if Characteristic == "Urban"
replace output_abs_s = "Ref" if Characteristic == "Primary or less"
replace output_abs_s = "Ref" if Characteristic == "Low"
replace output_abs_s = "Ref" if Characteristic == "Overweight (BMI 25.0 to 29.9)"

* Realiigning
leftalign

* Adding bolded labels		
label var Characteristic `"{bf:Characteristic}"'
label var output_prev_s `"`"{bf:Prevalence of}"' `"{bf:diabetes (%)}"'"'
label var output_abs_s `"`"{bf:Difference in}"' `"{bf:prevalence (%)}"'"'

replace Characteristic = "{bf:Age group}" if Characteristic == "Age group in years"
replace Characteristic = "{bf:Ethnicity}" if Characteristic == "Ethnicity"
replace Characteristic = "{bf:Maternal language}" if Characteristic == "Maternal language"
replace Characteristic = "{bf:Area of residence}" if Characteristic == "Residence"
replace Characteristic = "{bf:Education}" if Characteristic == "Education"
replace Characteristic = "{bf:Economic tertiles}" if Characteristic == "Economic tertiles"
replace Characteristic = "{bf:BMI category}" if Characteristic == "BMI category"
replace Characteristic = "{bf:Overall}" if Characteristic == "Overall"


// * Indicator variable needed to make symbols different
// gen plotid = 1 if label == "Women" | label == "Men"
// replace plotid = 2 if label == "Overall"

* Prevalence graph
forestplot prev_est prev_lci prev_uci, ///
	labels(Characteristic) ///
	/// eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_prev_s) leftjustify nobox /// this tells forestplot which columsn to display
	range(0 25) /// range of the plot
	/// null(0) ///
	xlabel(0(10)20, labsize(2.3)) /// plot labels xmtick(0.3(0.1)1.5) ///
	xtitle("Prevalence of diabetes (%)", size(2.5) margin(l=30 r=0 b=0 t=1)) ///    
	yla(none) ///
	astext(80) ///
	spacing(1.2) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		/// nlineopts(lwidth(.2rs)) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.4rs)) ///
	plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
	graphregion(color(white) lcolor(white) margin(l=0 r=0 b=0 t=0)) ///
	aspect(1) savedims(A) ///
	name(forestplot_prev,replace) //
	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy
	
* Margins graph
forestplot abs_est abs_lci abs_uci, ///
	/// labels(Characteristic) ///
	/// eform ///
	nostats /// this tells it not to calculate stats but rather use raw input data
	rcols(output_abs_s) leftjustify  nobox /// /// this tells forestplot which columsn to display
	range(-10 15) /// range of the plot
	/// null(0) ///
	xlabel(-10(10)10, labsize(2.3)) /// plot labels xmtick(0.3(0.1)1.5) ///
	xtitle("Difference in prevalence (%)", size(2.5) margin(l=0 r=0 b=0 t=1)) ///      
    yla(none) ///
	astext(60) ///
	spacing(1.2) /// this refers to spacing by line
	/// style options
		diamopts(lwidth(`line_size')) ///
		boxopts(mcolor(none)) ///
		ciopts(lwidth(`line_size')) ///
		/// nlineopts(lwidth(.2rs)) ///
		olineopts(lwidth(0)) ///
		pointopts(msymbol(square) msize(0.4rs)) ///
	plotregion(margin(l=0 r=0 b=0 t=0) lcolor(none)) ///
	graphregion(color(white) lcolor(white) margin(l=0 r=0 b=0 t=0)) ///
	usedims(A) ///
	/// xtitle("Adjusted odds ratio", size(2.3)  margin(l=0 r=64 b=0 t=2)) ///
	/// favours("Less rural     " "achievement      " # "     More rural" "     achievement", labsize(2.1rs) nosymmetric) ///
	name(forestplot_margins,replace) //
	
gr_edit plotregion1._xylines[1].style.editstyle linestyle(color(black)) editcopy

graph combine forestplot_prev forestplot_margins, graphregion(color(white) lcolor(white) margin(zero))	imargin(zero) ysize(4) name(forest_plot_subgroup, replace) // xsize(5.5)

gr_edit plotregion1.graph1.style.editstyle aspect_pos(west) editcopy
gr_edit plotregion1.graph2.style.editstyle aspect_pos(west) editcopy
gr_edit plotregion1.graph2.xoffset = -10

gr_edit plotregion1.graph2.yaxis1.style.editstyle title_gap(-2) editcopy

gr_edit plotregion1.graph1.xaxis1.title.xoffset = 2
gr_edit plotregion1.graph2.xaxis1.title.xoffset = -10

gr_edit plotregion1.graph1.xaxis1.title.style.editstyle size(2.2) editcopy
gr_edit plotregion1.graph2.xaxis1.title.style.editstyle size(2.2) editcopy
	
graph save "Figures/forest_plot_subgroup_$date.gph", replace         
graph export "Figures/forest_plot_subgroup_$date.pdf", as(pdf) name(forest_plot_subgroup) replace		
graph export "Figures/forest_plot_subgroup_$date.png", as(png) name(forest_plot_subgroup) replace			
	
frame change default
frame drop forestplot	

////////////////////////////////////////////////////////////////////////////////
///////////////////// TABLE 2: Diabetes characteristics /////////////////////
////////////////////////////////////////////////////////////////////////////////

* Create the Excel doc
putexcel set "Putexcel tables/Table 2 Diabetes characteristics ${date}.xlsx", replace 

* Make the table frame
putexcel A1 = "Characteristic"
putexcel B1 = "Weighted value"

putexcel A2 = "Diabetes awareness"
putexcel A3 = "Previously diagnosed by physician or other health professional, % (95% CI)"
putexcel A4 = "Duration of diabetes"
putexcel A5 = "Family history of diabetes, % (95% CI)"
putexcel A6 = "Use of glucose-lowering medications, % (95% CI)"
putexcel A7 = "Oral glucose-lowering medication"
putexcel A8 = "Insulin"
putexcel A9 = "Oral glucose-lowering medication or insulin"
putexcel A10 = "Receives dietary treatment for glucose control"
putexcel A11 = "Glycemic status"
putexcel A12 = "HbA1c, median (IQR)"
putexcel A13 = "HbA1c <7.0%, % (95% CI)"
putexcel A14 = "HbA1c <8.0%, % (95% CI)"
putexcel A15 = "Blood pressure status"
putexcel A16 = "Systolic blood pressure, median (IQR)"	
putexcel A17 = "Diastolic blood pressure, median (IQR)"
putexcel A18 = "Blood pressure controlled (<130/80 mmHg), % (95% CI)"
putexcel A19 = "Health insurance status, % (95% CI)"
putexcel A20 = "Any health insurance"
putexcel A21 = "Social security health insurance"
putexcel A22 = "Private health insurance"
putexcel A23 = "Current tobacco use, % (95% CI)"

* Diabetes awareness

	* Previously diagnosed by physician or other health professional, % (95% CI)

	svy, subpop(sample_overall if clin_dia == 1): proportion dm_diag_new

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B3 = "`b' (`lci' to `uci')"
	
	* Duration of diabetes

	epctile diabetes_an, p(25 50 75) svy subpop(sample_overall) // not including clin_dia here as there is an error 

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.0f")
	local lci = string(table1_matrix[1,1],"%9.0f")
	local uci = string(table1_matrix[1,3],"%9.0f")
	putexcel B4 = "`b' (`lci' to `uci')"
	
* Family history of diabetes, % (95% CI)


	svy, subpop(sample_overall if clin_dia == 1): proportion dm_fam_hist_new

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B5 = "`b' (`lci' to `uci')"

* Use of glucose-lowering medications, % (95% CI)

	* Oral glucose-lowering medication
	
	svy, subpop(sample_overall if clin_dia == 1): proportion dm_med_new

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B7 = "`b' (`lci' to `uci')"

	* Insulin

	svy, subpop(sample_overall if clin_dia == 1): proportion insulin_new

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B8 = "`b' (`lci' to `uci')"
	
	* Oral glucose-lowering medication or insulin
	
	
	svy, subpop(sample_overall if clin_dia == 1): proportion glucose_low_med

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B9 = "`b' (`lci' to `uci')"
	
	* Receives dietary treatment for glucose control

	svy, subpop(sample_overall if clin_dia == 1): proportion dm_diet_new

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B10 = "`b' (`lci' to `uci')"
	
* Glycemic status
	
	* HbA1c, median (IQR)
	
	epctile hba1c, p(25 50 75) svy subpop(sample_overall if clin_dia == 1) 

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.1f")
	local lci = string(table1_matrix[1,1],"%9.1f")
	local uci = string(table1_matrix[1,3],"%9.1f")
	putexcel B12 = "`b' (`lci' to `uci')"
	
	* HbA1c <7.0%, % (95% CI)

	svy, subpop(sample_overall if clin_dia == 1): proportion a1c_7

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B13 = "`b' (`lci' to `uci')"
	
	* HbA1c <8.0%, % (95% CI)

	svy, subpop(sample_overall if clin_dia == 1): proportion a1c_8

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B14 = "`b' (`lci' to `uci')"
	
* Blood pressure status

	* Systolic blood pressure, median (IQR)
	
	epctile presion_sistolica, p(25 50 75) svy subpop(sample_overall if clin_dia == 1) 

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.0f")
	local lci = string(table1_matrix[1,1],"%9.0f")
	local uci = string(table1_matrix[1,3],"%9.0f")
	putexcel B16 = "`b' (`lci' to `uci')"

	* Diastolic blood pressure, median (IQR)
	
	epctile presion_diastolica, p(25 50 75) svy subpop(sample_overall if clin_dia == 1) 

	matlist r(table)
	matrix table1_matrix = r(table)
					
	local b = string(table1_matrix[1,2],"%9.0f")
	local lci = string(table1_matrix[1,1],"%9.0f")
	local uci = string(table1_matrix[1,3],"%9.0f")
	putexcel B17 = "`b' (`lci' to `uci')"
	
	* Blood pressure controlled (<130/80 mmHg), % (95% CI)

	svy, subpop(sample_overall if clin_dia == 1): proportion bp_control_130_80

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B18 = "`b' (`lci' to `uci')"

* Health insurance status, % (95% CI)

	* Any health insurance

	svy, subpop(sample_overall if clin_dia == 1): proportion any_health_insurance

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B20 = "`b' (`lci' to `uci')"
	
	* Social security health insurance

	svy, subpop(sample_overall if clin_dia == 1): proportion igss

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B21 = "`b' (`lci' to `uci')"
	
	* Private health insurance

	svy, subpop(sample_overall if clin_dia == 1): proportion private_insurance

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B22 = "`b' (`lci' to `uci')"
	
* Current tobacco use, % (95% CI)

	svy, subpop(sample_overall if clin_dia == 1): proportion current_smoking

	matlist r(table)
	matrix table_matrix = r(table)
					
	local b = string(table_matrix[1,2]*100,"%9.1f")
	local lci = string(table_matrix[5,2]*100,"%9.1f")
	local uci = string(table_matrix[6,2]*100,"%9.1f")
	putexcel B23 = "`b' (`lci' to `uci')"
	
////////////////////////////////////////////////////////////////////////////////
///////////////////// Figure 3: Diabetes care cascade //////////////////////////
////////////////////////////////////////////////////////////////////////////////	

* Step 1: Create the Excel doc -------------------------------------------------

putexcel set "Putexcel tables/Care cascade figure ${date}.xlsx", replace  	   
		   
* Make the table frame
putexcel A1 = "cascade_step"
putexcel A2 = "All diabetes"
putexcel A3 = "Diagnosed"
putexcel A4 = "Treated"
putexcel A5 = "Controlled"

putexcel B1 = "est"
putexcel C1 = "lci"
putexcel D1 = "uci"

putexcel B2 = 1

* Exporting estimates

  	* dm_diag_cascade
	
	svy, subpop(sample_overall if clin_dia == 1): proportion dm_diag_new
	
	matrix table_matrix = r(table)
					
	putexcel B3 = matrix(table_matrix[1,2])
	putexcel C3 = matrix(table_matrix[5,2])
	putexcel D3 = matrix(table_matrix[6,2])
	
	* dm_treated_cascade
	
	svy, subpop(sample_overall if clin_dia == 1): proportion any_diabetes_treatment
	
	matrix table_matrix = r(table)
					
	putexcel B4 = matrix(table_matrix[1,2])
	putexcel C4 = matrix(table_matrix[5,2])
	putexcel D4 = matrix(table_matrix[6,2])
	
	* dm_control_cascade
	svy, subpop(sample_overall if clin_dia == 1): proportion dm_control_cascade
	
	matrix table_matrix = r(table)
					
	putexcel B5 = matrix(table_matrix[1,2])
	putexcel C5 = matrix(table_matrix[5,2])
	putexcel D5 = matrix(table_matrix[6,2])
	
* Step 2: Graph the data -------------------------------------------------------
frame create figure
frame change figure		
						
import excel "Putexcel tables/Care cascade figure ${date}.xlsx", sheet("Sheet1") firstrow clear

* Convert proportions to percents
foreach v of varlist est lci uci {
	replace `v' = `v'*100
}

gen cascade_step2 = .
replace cascade_step2 = 1 if cascade_step == "All diabetes"
replace cascade_step2 = 2 if cascade_step == "Diagnosed"
replace cascade_step2 = 3 if cascade_step == "Treated"
replace cascade_step2 = 4 if cascade_step == "Controlled"

	label variable cascade_step2 "Cascade step"
	label define cascade_step2 ///
		1 "All diabetes" ///
		2 "Diagnosed" ///
		3 "Treated" ///
		4 "Controlled", modify
	label values cascade_step2 cascade_step2

	drop cascade_step

twoway ///
	bar est cascade_step2 if cascade_step == 1, ///
		fcolor(gs13) ///
		lcolor(black) ///
		msize(1rs)	///	
		barw(0.6) || ///
	rspike lci uci cascade_step2 if cascade_step == 1, ///
		lcolor(black) ///
		msize(1rs)		|| ///
	bar est cascade_step2 if cascade_step == 2, ///
		fcolor(gs13) ///
		lcolor(black) ///
		msize(1rs)	///	
		barw(0.6) || ///
	rspike lci uci cascade_step2 if cascade_step == 2, ///
		lcolor(black) ///
		msize(1rs)		|| ///
	bar est cascade_step2 if cascade_step == 3, ///
		fcolor(gs13) ///
		lcolor(black) ///
		msize(1rs)	///	
		barw(0.6) || ///
	rspike lci uci cascade_step2 if cascade_step == 3, ///
		lcolor(black) ///
		msize(1rs)		|| ///
	bar est cascade_step2 if cascade_step == 4, ///
		fcolor(gs13) ///
		lcolor(black) ///
		msize(1rs)	///	
		barw(0.6) || ///
	rscap lci uci cascade_step2 if cascade_step == 4, ///
		lcolor(black) ///
		msize(1rs)	 ///
		///
	/// graph attributes
		xsize(5) ///
		ysize(4) ///
		xlabel(1 2 3 4, valuelabel) yscale(r(0(25)100) lcolor(black) lwidth(none) titlegap(0))  ///
		legend(off) ///
		xtitle("Cascade step", size(4rs) color(black) margin(medsmall)) ///
		ytitle("Percent (%)", size(4rs) color(black) margin(medsmall)) ///
		plotregion(margin(l=5rs r=5rs b=0 t=0) lwidth(none) lalign(center))  ///
		graphregion(margin(l=3rs r=3rs t=8rs b=3rs)) ///
		xscale(line ) yscale(line ) ///
		name(cascade_figure, replace) //	
		
graph save "Figures/cascade_figure_$date.gph", replace         
graph export "Figures/cascade_figure_$date.pdf", as(pdf) name(cascade_figure) replace
graph export "Figures/cascade_figure_$date.png", as(png) name(cascade_figure) replace

frame change default
frame drop figure


* sensitivity analysis
tab dm_diag_new glucose_low_med, m

gen clin_dia2 = 0 if !missing(hba1c) & !missing(dm_diag_new)
replace clin_dia2 = 1 if ///
	glucose_low_med == 1 | /// prior diagnosis
	inrange(hba1c,6.5,20)

svy, subpop(sample_overall): proportion clin_dia
svy, subpop(sample_overall): proportion clin_dia2
